In [1]:
import altair as alt
import pandas as pd
import numpy as np
import os
from toolz.curried import pipe
from vega_datasets import data
from altair import datum
# Handle large data sets (default shows only 5000)
# See here: https://altair-viz.github.io/user_guide/data_transformers.html
alt.data_transformers.disable_max_rows()
df = pd.read_csv("../../data/processed/cleaned_financial_data.csv", low_memory=True)
df['Year_of_Study'] = df['Year_of_Study'].astype(int)
Visualization 1 - Income Ranges and Vote Preference Map¶
In [2]:
df_income=df[[
"Year_of_Study",
"Voting_Preference",
"State_Code_FIPS",
"Income_Group",
"Unemployment_Past_Year",
"Unemployment_Next_Year"
]].dropna()
income_mapping = {
'Low Income': 1,
'Middle Income': 2,
'Upper Middle Income': 3,
'High Income': 4
}
# 创建数值型的 Income_Group 列(覆盖原列),并转为 int64 类型
df_income['Income_Level'] = df_income['Income_Group'].map(income_mapping).astype('int64')
df_income['Income_Level'] = (df_income['Income_Level'] - 1) / 3 * 100
# Step 1: Filter to keep only Democrat and Republican, then count votes
vote_counts = (
df_income[df_income['Voting_Preference'].isin(['Democrat', 'Republican'])]
.groupby(['State_Code_FIPS', 'Year_of_Study', 'Voting_Preference'])
.size()
.reset_index(name='Count')
)
# Step 2: Pivot the table to turn vote results into separate columns
vote_pivot = vote_counts.pivot(
index=['State_Code_FIPS', 'Year_of_Study'],
columns='Voting_Preference',
values='Count'
)
# Step 3: Create a new column 'Dominant_Party' by comparing the two counts
def determine_party(row):
if row['Democrat'] > row['Republican']:
return 'Democrat'
elif row['Republican'] > row['Democrat']:
return 'Republican'
else:
return 'Neutral'
vote_pivot['Dominant_Party'] = vote_pivot.apply(determine_party, axis=1)
# Step 4: Reset index and clean up result
vote_pivot = vote_pivot.reset_index()
dominant_party_df = vote_pivot[['State_Code_FIPS', 'Year_of_Study', 'Dominant_Party']]
# Group the data by "State Code + Year", then compute the mean for all numeric columns
df_income_avg = df_income.groupby(['State_Code_FIPS', 'Year_of_Study'], as_index=False).mean(numeric_only=True)
# Perform a left join on the two tables using State_Code_FIPS and Year_of_Study
df_income_avg = df_income_avg.merge(
dominant_party_df,
on=['State_Code_FIPS', 'Year_of_Study'],
how='left' # Keep all records from df_income_avg
)
# Melt the data into long format: each row represents one state, one year, one party, and its income value
df_melt_income = df_income_avg.melt(
id_vars=['State_Code_FIPS', 'Year_of_Study','Dominant_Party'], # Keep FIPS, year, and party as identifier columns
value_vars=['Income_Level'], # Only melt the 'Income_Group' column
var_name='Attribute', # Name of the variable column (i.e., 'Income_Group')
value_name='Value' # Name of the value column
)
# Pivot the long-format table back into wide format for use as a lookup structure in the map
df_wide_income = df_melt_income.pivot(
index='State_Code_FIPS',
columns=['Attribute', 'Year_of_Study','Dominant_Party'],
values='Value'
).reset_index()
# Flatten MultiIndex column names, e.g., ('Income_Group', 2020, 'Democrat') → 'Income_Group_2020_Democrat'
df_wide_income.columns = [
col[0] if col[1] == '' # If it's a single-level column (e.g., FIPS), keep it as is
else f"{col[0]}_{col[1]}_{col[2]}" # Otherwise, concatenate Attribute + Year + Party
for col in df_wide_income.columns.to_flat_index()
]
# Record all column names for use in transform_lookup
all_wide_cols = [c for c in df_wide_income.columns]
# Create a slider interaction from 2004 to 2020, stepping every 4 years (election cycles)
slider = alt.binding_range(
min=2004, max=2020, step=4, name='Year: ' # Range, step, and label
)
# Define a selection object to capture the current year selected by the slider
select_year = alt.selection_point(
fields=['Year_of_Study'], # Bound field
bind=slider, # Bind to slider control
value=2020, # Default selected year
)
# Highlight a specific U.S. state when clicked (by FIPS code)
highlight_state = alt.selection_point(
fields=['State_Code_FIPS'], # bound to state FIPS code
value=1, # default selected state
empty='none' # no selection = no highlight
)
checkbox_party = alt.binding_checkbox(name='Show Winning Party Color')
show_party_color = alt.selection_point(
name="party_toggle",
fields=['show'],
bind=checkbox_party,
value=[{'show': True}] # default checked box
)
# Create a selection bound to the legend, toggle off for single selection
select_point = alt.selection_point(
fields=['Dominant_Party'], # bound to field
bind='legend', # bind to legend
toggle=False # disable toggle for exclusive selection
)
party_selection = alt.selection_point(fields=['Voting_Preference'],
empty='all')
income_selection = alt.selection_point(fields=['Income_Group'],
empty='all')
# Load the US states topojson data for drawing the base map
states = alt.topo_feature(data.us_10m.url, 'states')
chart1_income = (
alt.Chart(states)
.mark_geoshape()
.transform_lookup(
lookup='id',
from_=alt.LookupData(df_wide_income, 'State_Code_FIPS', all_wide_cols)
)# Join map geometry with our data using State_Code_FIPS
.transform_fold(
fold=all_wide_cols,
as_=['AttributeYear', 'Value']
)# Fold all wide columns like 'Income_Group_2020' into two fields: AttributeYear & Value
.transform_calculate(
Year_of_Study="parseInt(split(datum.AttributeYear, '_')[2])",
Attribute="split(datum.AttributeYear, '_')[0]",
Dominant_Party="split(datum.AttributeYear, '_')[3]",
show="true"
)
.transform_filter(select_year) # Filter to show only the selected year from the slider
# .transform_filter(select_attribute) # Filter to show only the selected attribute from the legend
.encode(
stroke=alt.condition(
highlight_state,
alt.value('gray'),
alt.value('white')
),
strokeWidth=alt.condition(
highlight_state,
alt.value(5),
alt.value(2)
),
strokeOpacity=alt.condition(
highlight_state,
alt.value(1),
alt.value(0.1)
),
color=alt.condition(
show_party_color,
alt.Color(
'Dominant_Party:N',
title='Winning Party',
# legend=alt.Legend(
# orient='top',
# legendX=300,
# legendY=-20,
# titleAnchor="middle",
# titleAlign="center",
# offset=10
# ),
scale=alt.Scale(domain=['Democrat', 'Republican', 'Neutral'],
range=['#1f77b4', '#d62728', '#999999'] # Blue, Red, Gray
)
),
alt.value('grey') # When unchecked, display everything in gray
),
opacity=alt.Opacity(
'Value:Q',
title='Avg Income Level',
scale=alt.Scale(domain=[20, 60]),
legend=None
),
tooltip=[
alt.Tooltip('id:O', title='State FIPS'),
alt.Tooltip('Value:Q', title='Income Level'),
alt.Tooltip('Year_of_Study:N', title='Year'),
alt.Tooltip('Dominant_Party:N', title='Voting Preference')
]
).project(type='albersUsa')# Use Albers USA map projection (standard for US maps)
.properties(
width=800,
height=800,
title='Average Income and Vote Preference by US State (2004–2020)'
).add_params(select_year, highlight_state, show_party_color)
)
vote_bar_chart = alt.Chart(
df_income
).transform_filter(
select_year
).transform_filter(
highlight_state
).transform_filter(
income_selection
).mark_bar().encode(
x=alt.X('Voting_Preference:N', title='Party', axis=alt.Axis(labelAngle=0)),
y=alt.Y('count():Q', title='Number of Votes'),
color=alt.condition(
party_selection,
alt.Color('Voting_Preference:N',
title='Party',
scale=alt.Scale(domain=['Democrat', 'Republican'], range=['#1f77b4', '#d62728'])),
alt.value('lightgray')
),
tooltip=[
alt.Tooltip('Voting_Preference:N', title='Party'),
alt.Tooltip('count():Q', title='Votes')
]
).properties(
width=100,
height=250,
title='Vote Count in Selected State'
).add_params(
party_selection
)
income_bar_chart = alt.Chart(df_income).transform_filter(
select_year
).transform_filter(
highlight_state
).transform_filter(
party_selection
).mark_bar().encode(
x=alt.X('Income_Group:N',
title='Income Group',
sort=['Low Income', 'Middle Income', 'Upper Middle Income', 'High Income'],
axis=alt.Axis(labelAngle=-30)),
y=alt.Y('count():Q', title='Count of Respondents'),
color=alt.condition(
income_selection,
alt.Color('Income_Group:N',
title='Income Group',
scale=alt.Scale(domain=['Low Income', 'Middle Income', 'Upper Middle Income', 'High Income'],
range=['#e66101', '#fdb863', '#b2df8a', '#1a9641'])),
alt.value('lightgray')
),
tooltip=[
alt.Tooltip('Income_Group:N', title='Income Group'),
alt.Tooltip('count():Q', title='Respondent Count')
]
).properties(
width=100,
height=250,
title='Income Group Distribution'
).add_params(
income_selection
)
Visualization 2 - Unemployment Perception vs. Voting Preferences Bubble Matrix¶
In [3]:
vote_aggregated = df_income.groupby(['Year_of_Study',
'State_Code_FIPS',
'Unemployment_Past_Year',
'Unemployment_Next_Year',
'Voting_Preference']).size().reset_index(name='count')
total_counts = vote_aggregated.groupby(['State_Code_FIPS',
'Year_of_Study',
'Unemployment_Past_Year',
'Unemployment_Next_Year'])["count"].transform("sum")
vote_aggregated["percentage"] = (vote_aggregated["count"] / total_counts) * 100
past_une_order = ["Better", "Same", "Worse"]
next_une_order = ["Better", "Same", "Worse"]
party_selector = alt.selection_point(
name="bubble_party",
fields=["Voting_Preference"],
bind="legend",
empty="all"
)
chart2 = alt.Chart(vote_aggregated).mark_circle(opacity=0.5).transform_filter(
party_selection
).transform_filter(
highlight_state
).encode(
x=alt.X('Unemployment_Past_Year:N', sort=past_une_order, title=None, axis=alt.Axis(labelAngle=-30)),
y=alt.Y("Unemployment_Next_Year:N", sort=next_une_order, title=None),
color=alt.condition(
party_selection,
alt.Color('Voting_Preference:N',
title="Party",
# legend=alt.Legend(orient='bottom', direction='vertical'),
# legend=None,
scale=alt.Scale(domain=["Democrat", "Republican"], range=["blue", "red"])),
alt.value('lightgray')
),
size=alt.Size("percentage:Q",
title='Percentage',
# legend=alt.Legend(orient='bottom'),
scale=alt.Scale(range=[5, 500])),
row=alt.Row('Year_of_Study', title=None, header=alt.Header(labelOrient="top")),
tooltip=["Voting_Preference", "percentage"]
# ).add_params(
# party_selector
).properties(
width=90,
height=90,
title=["Voting Preference vs.", "Past and Future", "Unemployment Perception", "(2000–2020)"]
)
y_label = alt.Chart(pd.DataFrame({'text': ['Future Unemployment Perception']})).mark_text(
angle=270,
align='center',
baseline='middle',
fontWeight='bold',
fontSize=16,
dx=-270,
dy=0
).encode(
text='text:N'
).properties(
width=0,
height=0
)
x_label = alt.Chart(pd.DataFrame({'text': ['Past Unemployment Perception']})).mark_text(
align='center',
baseline='bottom',
fontWeight='bold',
fontSize=16
).encode(
text='text:N'
).properties(
width=0,
height=0
)
combined_chart2= y_label | (chart2
&
x_label)
Visualization 3 - Shifts in Economic Attitudes and Party Alignment Over Election Cycles¶
In [4]:
df_thermometer=df[[
"Year_of_Study",
"State_Code_FIPS",
"Voting_Preference",
"Thermometer_Business",
"Thermometer_Labor_Union",
"Thermometer_Middle_Class",
"Thermometer_Welfare",
"Thermometer_Poor",
]].dropna()
thermometer_vars = [
"Thermometer_Business",
"Thermometer_Labor_Union",
"Thermometer_Middle_Class",
"Thermometer_Welfare",
"Thermometer_Poor"
]
attribute_label_map = {
"Thermometer_Unions": "Unions",
"Thermometer_Poor": "The Poor",
"Thermometer_Business": "Big Business",
"Thermometer_Labor_Union": "Labor Union",
"Thermometer_Middle_Class": "Middle Class",
"Thermometer_Welfare": "Welfare",
"Thermometer_Poor": "The Poor"
}
fips_to_state = {
'01': 'AL', '02': 'AK', '04': 'AZ', '05': 'AR', '06': 'CA',
'08': 'CO', '09': 'CT', '10': 'DE', '11': 'DC', '12': 'FL',
'13': 'GA', '15': 'HI', '16': 'ID', '17': 'IL', '18': 'IN',
'19': 'IA', '20': 'KS', '21': 'KY', '22': 'LA', '23': 'ME',
'24': 'MD', '25': 'MA', '26': 'MI', '27': 'MN', '28': 'MS',
'29': 'MO', '30': 'MT', '31': 'NE', '32': 'NV', '33': 'NH',
'34': 'NJ', '35': 'NM', '36': 'NY', '37': 'NC', '38': 'ND',
'39': 'OH', '40': 'OK', '41': 'OR', '42': 'PA', '44': 'RI',
'45': 'SC', '46': 'SD', '47': 'TN', '48': 'TX', '49': 'UT',
'50': 'VT', '51': 'VA', '53': 'WA', '54': 'WV', '55': 'WI',
'56': 'WY'
}
df_thermometer['State_Code_FIPS'] = df_thermometer['State_Code_FIPS'].astype(int).astype(str).str.zfill(2)
df_thermometer['State_Abbrev'] = df_thermometer['State_Code_FIPS'].map(fips_to_state)
df_melted = df_thermometer.melt(
id_vars=["Year_of_Study", "State_Abbrev", "Voting_Preference"],
value_vars=thermometer_vars,
var_name="Attribute",
value_name="Score"
)
df_melted["AttributeLabel"] = df_melted["Attribute"].map(attribute_label_map)
# Create dropdown control bound to variable selector
dropdown = alt.binding_select(
options=df_melted["AttributeLabel"].unique().tolist(),
name="Select Thermometer: "
)
attitude_selector = alt.selection_point(
fields=["AttributeLabel"],
bind=dropdown,
value="Big Business"
)
state_selector = alt.selection_point(
name="density_party",
fields=["State_Abbrev"],
empty="all"
)
# party_selector = alt.selection_point(
# fields=["Voting_Preference"],
# bind="legend",
# empty="all"
# )
at_brush = alt.selection_interval(
encodings=["x"],
name="score_range",
resolve='global'
)
# bar chart
thermo_avg_bar = alt.Chart(df_melted).transform_filter(
attitude_selector
).transform_filter(
party_selector
).transform_filter(
at_brush
).mark_bar().encode(
x=alt.X('State_Abbrev:N', sort='-y', title='State'),
y=alt.Y('mean(Score):Q', title='Average Score'),
color=alt.condition(
state_selector,
alt.Color('mean(Score):Q',
# legend=alt.Legend(orient='bottom'),
scale=alt.Scale(scheme='oranges')),
alt.value('lightgray')
),
tooltip=[
alt.Tooltip('State_Abbrev:N', title='State'),
alt.Tooltip('mean(Score):Q', title='Average Score', format='.1f')
]
).add_params(
state_selector
).properties(
width=1550,
height=100,
title='Average Thermometer Score by State'
)
thermo_density_chart = alt.Chart(df_melted).transform_filter(
attitude_selector
).transform_filter(
state_selector
).transform_density(
"Score",
groupby=["Voting_Preference",'Year_of_Study'],
as_=["Score", "Density"]
).mark_area(opacity=0.4).encode(
x=alt.X("Score:Q", scale=alt.Scale(domain=[0, 100]), title="Thermometer Score"),
y=alt.Y("Density:Q", title="Density",stack=None),
color=alt.condition(
party_selector,
alt.Color('Voting_Preference:N',
title='Party (Clickable)',
scale=alt.Scale(domain=["Democrat", "Republican"], range=["blue", "red"])),
alt.value('lightgray')
),
column=alt.Column('Year_of_Study:O', title='Year'),
tooltip=[
alt.Tooltip("Voting_Preference:N", title="Party"),
alt.Tooltip("Score:Q", format=".1f"),
alt.Tooltip("Density:Q", format=".3f")
]
).add_params(
attitude_selector,
party_selector,
at_brush
).properties(
width=500,
height=150,
title='Distribution of Scores by Party in Selected State'
).resolve_scale(
x='shared'
)
# combined_charts
In [7]:
combined_charts = alt.vconcat(
alt.hconcat(
chart1_income.add_params(highlight_state),
alt.vconcat(
vote_bar_chart,
income_bar_chart
).resolve_scale(color='independent'),
combined_chart2,
).resolve_scale(
color='independent',
size='independent'
),
thermo_avg_bar,
thermo_density_chart,
).resolve_scale(
color='independent'
)
def apply_default_config(chart):
return chart.configure_view(
strokeWidth=0 # No border for the chart
).configure_title(
fontSize=18
).configure_axis(
labelFontSize=14,
titleFontSize=16
).configure_header(
labelFontSize=15,
titleFontSize=17
).configure_legend(
labelFontSize=14,
titleFontSize=16
).configure_concat(
spacing=0 # No spacing between the charts
)
apply_default_config(combined_charts)
Out[7]:
In [ ]: